Context

Comme on a la variable continue du CO2 élevé, on se dit que les expressions différentielles deux à deux auront moins d’intérêt que des analyses par splines.

Import et normalisation des données

Données après correction des mix-up samples :

corrplot(cor(data), method = 'color')

tcc <- normalize(data, norm_method = 'tmm', iteration = FALSE)
conditions <- str_split_fixed(colnames(data), '_', 2)[,1]
tcc <- filter_low_counts(tcc, 10*length(conditions))
normalized_counts <- TCC::getNormalizedData(tcc)
draw_distributions(data) /
draw_distributions(normalized_counts)

draw_PCA(normalized_counts)

Differential expression analysis

DF <- 2
FDR <- 0.05

CO2 <- as.numeric(str_split_fixed(conditions, '\\.', 2)[,1])
CO2_spline <- ns(CO2, df = DF)

N <- str_split_fixed(conditions, '\\.', 2)[,2]

design <- model.matrix(~CO2_spline*N)
fit <- lmFit(normalized_counts, design) %>% 
  eBayes() 

CO2_coeffs <- 2:(1+DF)
N_coeff <- DF+2
CO2_N_coeffs <- (DF+3):ncol(design)

DEGs_CO2 <- rownames(topTable(fit, coef=CO2_coeffs, 
                        adjust.method = "fdr", 
                        p.value=FDR, number=20000))

DEGs_N <- rownames(topTable(fit, coef=N_coeff, 
                        adjust.method = "fdr", 
                        p.value=FDR, number=20000))

DEGs_CO2_N <- rownames(topTable(fit, coef=CO2_N_coeffs, 
                        adjust.method = "fdr", 
                        p.value=FDR, number=20000))
length(DEGs_CO2)
## [1] 2043
length(DEGs_N)
## [1] 1675
length(DEGs_CO2_N)
## [1] 5375

Drawing the expression levels of some genes of interest

genes <- sample(DEGs_CO2, 5)


draw_genes <- function(genes, normalized_counts, ncol=NULL){
  reshape2::melt(normalized_counts[genes,] , 
               quiet =T, value.name = "Expression") %>%
  rename(gene = Var1, condition = Var2) %>%
  separate(condition, into = c("CO2", "Nutrition", "rep")) %>%
  mutate(CO2 = as.numeric(CO2)) %>%
  ggplot(aes(x=CO2, y=Expression, color=Nutrition, fill = Nutrition)) +
  geom_point() + geom_smooth(method = "lm", se = TRUE,
                size = 1, alpha=0.1,
                formula = y ~ splines::ns(x, df = DF)) +
  facet_wrap(~gene, scales = 'free', ncol = ncol) + 
    theme_pubr() + scale_fill_brewer(palette="Accent")+ 
    scale_color_brewer(palette="Accent")+
    labs(title = "Normalized gene expression in CO2 gradient experiment", 
         subtitle = paste("Splines interpolation with DF=", DF))
}


draw_genes(genes, normalized_counts)

Clustering of DEGs

draw_clusters <- function(normalized_counts, genes, clustering, ncol=NULL){
  reshape2::melt(normalized_counts[genes,], 
               quiet =T, value.name = "Expression") %>%
  rename(gene = Var1, condition = Var2) %>%
  separate(condition, into = c("CO2", "Nutrition", "rep")) %>%
  mutate(CO2 = as.numeric(CO2),
         cluster = clustering$membership[gene]) %>%
  group_by(gene) %>%
  mutate(Expression= (Expression-mean(Expression))/sd(Expression)) %>%
    group_by(cluster, CO2, Nutrition, rep) %>%
    mutate(N_genes = n()) %>%
    unite(Cluster_label, cluster, N_genes, sep = ', N=') %>%
  ggplot(aes(x=CO2, y=Expression, color=Nutrition, fill = Nutrition)) +
  geom_point(alpha=0.01)+geom_smooth(method = "lm", se = TRUE,
                size = 1, alpha=0.1,
                formula = y ~ splines::ns(x, df = DF)) +
  facet_wrap(~Cluster_label, scales = 'free', ncol = ncol) + 
    theme_pubr() +scale_fill_brewer(palette="Accent")+ 
    scale_color_brewer(palette="Accent")+
    labs(title = "Normalized clusters expression in CO2 gradient experiment", 
         subtitle = paste("Splines interpolation with DF=", DF))
}

draw_clusters(normalized_counts, DEGs_CO2, 
              run_coseq(conds = conditions, data = normalized_counts, 
                        genes = DEGs_CO2, K = 6, transfo = "arcsin", 
                        model = "Normal", seed = 123)) + ggtitle("C02 DEGs : clusters profiles")
## ****************************************
## coseq analysis: Normal approach & arcsin transformation
## K = 6 to 6 
## Use seed argument in coseq for reproducible results.
## ****************************************

draw_clusters(normalized_counts, DEGs_N, 
              run_coseq(conds = conditions, data = normalized_counts, 
                        genes = DEGs_N, K = 4, transfo = "arcsin", 
                        model = "Normal", seed = 123)) + ggtitle("N DEGs : clusters profiles")
## ****************************************
## coseq analysis: Normal approach & arcsin transformation
## K = 4 to 4 
## Use seed argument in coseq for reproducible results.
## ****************************************

draw_clusters(normalized_counts, DEGs_CO2_N, 
              run_coseq(conds = conditions, data = normalized_counts, 
                        genes = DEGs_CO2_N, K = 9, transfo = "arcsin", 
                        model = "Normal", seed = 123)) + ggtitle("N*CO2 DEGs : clusters profiles")
## ****************************************
## coseq analysis: Normal approach & arcsin transformation
## K = 9 to 9 
## Use seed argument in coseq for reproducible results.
## ****************************************

Ontology enrichment of DEGs

bg <- convert_from_agi(rownames(normalized_counts))
## 
entrez <- convert_from_agi(DEGs_CO2)
go_CO2 <- enrich_go(entrez, bg)
## 
DT::datatable(go_CO2) 
entrez <- convert_from_agi(DEGs_N)
go_N <- enrich_go(entrez, bg)
DT::datatable(go_N)
entrez <- convert_from_agi(DEGs_CO2_N)
go_CO2_N <- enrich_go(entrez, bg)
DT::datatable(go_CO2_N)